Expert(Det) Discounted Return Avg: 1173.31 Std: 28.15
array([1173.646, 1201.897, 1241.899, 1165.564, 1195.389, 1205.245,
       1154.189, 1119.836, 1159.449, 1170.164, 1181.454, 1216.254,
       1166.499, 1154.193, 1094.143, 1141.998, 1191.739, 1204.917,
       1186.826, 1141.529, 1197.239, 1132.225, 1164.92 , 1141.753,
       1137.306, 1162.184, 1175.477, 1182.412, 1161.986, 1187.534,
       1166.526, 1190.614, 1149.009, 1209.673, 1155.851, 1169.031,
       1183.234, 1149.881, 1164.686, 1160.384, 1170.479, 1192.544,
       1211.65 , 1197.933, 1220.765, 1174.259, 1220.316, 1155.349,
       1178.062, 1187.403, 1145.96 , 1132.425, 1177.722, 1142.115,
       1198.151, 1198.279, 1133.336, 1196.709, 1168.587, 1150.766,
       1208.235, 1165.46 , 1219.288, 1137.319])
ordereddict([('seed', 1), ('cuda', -1), ('experiment_tag', 'ant_expert_500'), ('env', ordereddict([('env_name', 'CustomAnt-v0'), ('T', 500)])), ('sac', ordereddict([('epochs', 2000), ('log_step_interval', 5000), ('update_every', 50), ('update_num', 1), ('random_explore_episodes', 10), ('batch_size', 100), ('lr', 0.001), ('alpha', 0.2), ('automatic_alpha_tuning', False), ('buffer_size', 1000000), ('num_test_episodes', 10)])), ('expert', ordereddict([('samples_episode', 64)]))]), 
       1160.384, 1170.479, 1192.544,
       1211.65 , 1197.933, 1220.765, 1174.259, 1220.316, 1155.349,
       1178.062, 1187.403, 1145.96 , 1132.425, 1177.722, 1142.115,
       1198.151, 1198.279, 1133.336, 1196.709, 1168.587, 1150.766,
       1208.235, 1165.46 , 1219.288, 1137.319])
